 # Simulation Performance
## Show that ADP outperforms single digit tests

## Define an individual digit preference as a benford distribution where some digits
## are replaced with a preferred digit

digitpreference <- function(n){
    # For sample size n, start with benford numbers
    mantissa  = exp(runif(n)*log(10))
    power = sample(seq(3,7),n, replace = T)
    benford = floor(mantissa *(10^power))

    # Pick 2 digits to prefer, for the whole dataset for this creator
    x <- sample(0:9, 2)

    # With 20% probability for each obs, replace a random digit in the number
    # for a preferred digit
    for(i in 1:n){
      if(runif(1) < 0.2){
        # Turn number into string
        num = toString(benford[i])
        # Find position to replace
        pos = sample(1:nchar(num), 1)
        
        replace = sample(x, 1)
        
        substr(num, pos, pos) = toString(replace)
        benford[i] = num
      } 
    }
    return(benford)
}

nodigitpreference <- function(n){
    # For sample size n, create benford numbers
    mantissa  = exp(runif(n)*log(10))
    power = sample(seq(3,7),n, replace = T)
    benford = floor(mantissa *(10^power))

    return(benford)
}

## Each district generates its own data
# A,B,C have digit preferences
# D,E,F do not 

N <- 1000
districts_bad <- c("A", "B", "C")
districts_good <- c("D", "E", "F")

D = data.frame(matrix(ncol = 0, nrow = 0))

for(d in districts_bad){
  tmp <- data.frame(matrix(ncol = 0, nrow = N))
  tmp$Expense <- digitpreference(N) 
  tmp$District <- d 
  D<- rbind(D, tmp)
}

for(e in districts_good){
  tmp <- data.frame(matrix(ncol = 0, nrow = N))
  tmp$Expense <- nodigitpreference(N) 
  tmp$District <- e 
  D<- rbind(D, tmp)
}



#Compare our tests to single digit tests
#library(devtools)
#install_github("jlederluis/digitanalysis", force = TRUE)

library(digitanalysis)
library(ggplot2)

Data <- process_digit_data(raw_df = D, digit_columns = c('Expense'))

# For summarizing values
D$Expense2 <- as.numeric(D$Expense)

#######################################
# Plot toggle: turn on before plotting
plot_toggle = TRUE 

### Single digit tests 

# First digit test
first_digit = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = 1, omit_05 = 0, break_out='District',
                              suppress_first_division_plots=TRUE, plot=plot_toggle)
first_digit$p_values
first_digit$plot$AllBreakout

# Second digit test
second_digit = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = 2, omit_05 = 0, break_out='District',
                              suppress_first_division_plots=TRUE, plot=plot_toggle)
second_digit$p_values
second_digit$plot$AllBreakout


# Third digit test
third_digit = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = 3, omit_05 = 0, break_out='District',
                              suppress_first_division_plots=TRUE, plot=plot_toggle)
third_digit$p_values
third_digit$plot$AllBreakout


#All digits test with expenditure
all_digits = all_digits_test(digitdata = Data, data_columns = 'Expense', skip_first_digit = FALSE, omit_05 = 0, break_out='District',
                            suppress_first_division_plots=TRUE, plot=plot_toggle)
all_digits$plots$AllBreakout
all_digits$p_values


#Last digits test with expenditure
last_digits = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = -1, omit_05 = 0, break_out='District',
                            suppress_first_division_plots=TRUE, plot=plot_toggle)
last_digits$plots$AllBreakout
last_digits$p_values

## First two
first_two = all_digits_test(digitdata = Data, data_columns = 'Expense', digit_places = c(1,2), omit_05 = 0, break_out='District',
                              suppress_first_division_plots=TRUE, plot=plot_toggle)


### Make figs

pdf("FirstDigits.pdf")
first_digit$plot$AllBreakout
dev.off()

pdf("SecondDigits.pdf")
second_digit$plot$AllBreakout
dev.off()

pdf("ThirdDigits.pdf")
third_digit$plot$AllBreakout
dev.off()

pdf("LastDigits.pdf")
last_digits$plot$AllBreakout
dev.off()


pdf("AllDigits.pdf")
all_digits$plot$AllBreakout
dev.off()

pdf("FirstTwo.pdf")
first_two$plot$AllBreakout
dev.off()

# P-Value printout to produce final table


sink(file = "P-Values.txt")

print("First Digits")
first_digit$p_values

print("Second Digits")
second_digit$p_values

print("Third Digits")
third_digit$p_values

print("Last Digits")
last_digits$p_values

print("AllDigits p")
all_digits$p_values

print("AllDigits N")
all_digits$sample_sizes

print("First Two")
first_two$p_values

#tapply(D$Expense2, D$District, summary)

sink()

